import os
import argparse
import prompts
import cv2
import csv
import json
import spacy
import re
from typing import Dict, List, Optional
import cloudgpt_api
import time

def parser_args():
    parser = argparse.ArgumentParser('')

    parser.add_argument("--preload", nargs='+', type=str, default=['img_features','captions','mods'],
                        help='List of properties to preload is computed once before.')
    parser.add_argument("--preload_path", nargs='+', type=str, default=r"./",
                        help='preload file path.')
    parser.add_argument("--dataset-path", type=str, default=r"./annotations",
                        help="Path to the dataset")
    parser.add_argument("--input_csv", help="Path to the original CSV file.",
                        default=r".\final_outputs.csv")

    # LLM & BLIP Prompt Arguments.
    available_prompts = [f'prompts.{x}' for x in prompts.__dict__.keys() if '__' not in x]
    parser.add_argument("--gpt_prompt", default='prompts.personalization_ego4d_analysis_prompts', type=str, choices=available_prompts,
                        help='Denotes the base prompt to use alongside GPT4V. Has to be available in prompts.py')
    parser.add_argument("--openai_engine", default='gpt-4o-20241120', type=str,
                        choices=[   "gpt-35-turbo-20220309",
                                    "gpt-35-turbo-16k-20230613",
                                    "gpt-35-turbo-20230613",
                                    "gpt-35-turbo-1106",
                                    "gpt-4-20230321",
                                    "gpt-4-20230613",
                                    "gpt-4-32k-20230321",
                                    "gpt-4-32k-20230613",
                                    "gpt-4-1106-preview",
                                    "gpt-4-0125-preview",
                                    "gpt-4-visual-preview",
                                    "gpt-4-turbo-20240409",
                                    "gpt-4o-20240513",
                                    "gpt-4o-20240806",
                                    "gpt-4o-20241120",
                                    "gpt-4o-mini-20240718",],
                        help='Openai LLM Engine to use.')

    # Save the updated CSV to this path
    parser.add_argument("--output_pth", help="Path to output CSV with new column.",
                        default=r"./egpmemory")
    parser.add_argument("--attributes_json", default=r".\final_global_attributes.json",
                        help="Path to save the final global attribute sets (JSON).")

    args = parser.parse_args()
    return args

args = parser_args()

# Example field map
FIELD_MAP = {
    "Major category": "major_category",
    "Subcategory": "subcategory",
    "Color": "color",
    "Shape": "shape",
    "Material": "material",
    "Texture": "texture",
    "Size": "size",
    "Brand": "brand",
    "Style": "style",
    "Pattern": "pattern",
    "Feature": "feature",
    "Usage": "usage",
    "Status": "status"
}

def get_mid_frame_as_np_array(video_path: str, start_frame: int, num_frames: int):
    """
    Opens the given video at `video_path`, seeks to the midpoint of the specified narrow segment,
    reads one frame, and returns it as a NumPy array (BGR).
    Returns None if any step fails.
    """
    if not os.path.exists(video_path):
        print(f"[WARN] Video file not found: {video_path}")
        return None

    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames < 1:
        print(f"[WARN] No frames found in {video_path}.")
        cap.release()
        return None

    # Calculate the midpoint of the specified narrow segment
    midpoint_frame = start_frame + num_frames // 2

    # Ensure midpoint_frame is valid
    if midpoint_frame >= total_frames:
        print(f"[WARN] Calculated midpoint_frame {midpoint_frame} exceeds total frames {total_frames}.")
        cap.release()
        return None

    cap.set(cv2.CAP_PROP_POS_FRAMES, midpoint_frame)
    ret, frame = cap.read()
    cap.release()

    if not ret or frame is None:
        print(f"[WARN] Could not read frame {midpoint_frame} from {video_path}")
        return None

    return frame  # BGR np array

def detect_object_attributes(api_client,
                             object_sentence: str,
                             image_data_url,
                             engine: str,
                             global_attrs: Dict[str, set]) -> Dict[str, str]:
    """
    Calls GPT with a system + user prompt to get a structured set of attributes.
    Then updates global_attrs if new items appear.
    Returns a dictionary of attributes, e.g.:
      {
        "major_category": "...",
        "subcategory": "...",
        ...
      }
    """
    # See your original code. Summarized for brevity here:

    # 1) Build system prompt
    sys_prompt = (
        "You are a helpful vision assistant that identifies object attributes in a structured JSON format. "
        "Ensure your output is valid JSON with exactly the specified top-level keys."
    )

    # 2) Build user prompt (based on object_sentence + known attributes)
    user_prompt = build_user_prompt(object_sentence, global_attrs)  # must be defined in your code

    # 3) Make the API call (example)
    try:
        response_str = api_client.openai_completion_vision_CoT(
            sys_prompt=sys_prompt,
            user_prompt=user_prompt,
            image=image_data_url,
            engine=engine
        )
    except Exception as e:
        print(f"[WARN] API call failed: {e}")
        return {}

    # 4) Parse JSON from GPT
    #    (remove any code fences, <Response> tags, etc.)
    cleaned_str = response_str.strip()
    cleaned_str = cleaned_str.replace('<Response>', '').replace('</Response>', '').strip()
    if cleaned_str.startswith('```json'):
        cleaned_str = cleaned_str.replace('```json', '').replace('```', '').strip()

    try:
        attributes = json.loads(cleaned_str)
    except json.JSONDecodeError:
        print(f"[WARN] Could not parse JSON from response: {cleaned_str[:300]} ...")
        return {}

    # For each known category (FIELD_MAP keys), see if GPT gave a value
    # and add it to the global set if it's new
    for cat_name, json_key in FIELD_MAP.items():
        val = attributes.get(json_key, "")
        if val:
            # Insert the new val into the global set/list, if not present
            if isinstance(global_attrs[cat_name], set):
                if val not in global_attrs[
                    cat_name] and val != "unknown" and val != "none" and val != "other" and "ununknown" not in val:
                    global_attrs[cat_name].add(val)
            elif isinstance(global_attrs[cat_name], list):
                if val not in global_attrs[
                    cat_name] and val != "unknown" and val != "none" and val != "other" and "ununknown" not in val:
                    global_attrs[cat_name].append(val)

    return attributes

def compute_attribute_similarity(
    target_attrs_json: str,  # e.g. a JSON string from the CSV
    candidate_attrs: Dict[str, str],
    target_object: str,  # Object from the CSV row, e.g., "bottle"
    candidate_object: str  # Object from the narration or candidate attributes
) -> float:
    """
    Compare the candidate GPT attributes with the target_attrs_json (a JSON string).
    Return a numeric similarity score. Higher = more similar.
    Only computes similarity if the object names match.
    """
    if target_object.lower().strip() != candidate_object.lower().strip():
        # If the objects do not match, return 0.0 similarity score
        return 0.0

    if not target_attrs_json:
        return 0.0

    try:
        target_attrs = json.loads(target_attrs_json)
    except:
        target_attrs = {}

    # simple approach: for each key in FIELD_MAP, if candidate_attrs[k] == target_attrs[k], +1
    score = 0
    max_possible = len(FIELD_MAP)  # or only count fields that are actually present
    for cat_name, json_key in FIELD_MAP.items():
        # candidate value
        cand_val = candidate_attrs.get(json_key, "").lower().strip()
        # target value
        targ_val = str(target_attrs.get(json_key, "")).lower().strip()

        if cand_val and cand_val == targ_val and cand_val not in ("unknown", "none", "other"):
            score += 1

    return float(score) / float(max_possible)

def create_full_clip(original_video_path: str, base_output_dir: str, start_frame: int, num_frames: int, original_video_uid: str) -> Optional[str]:
    """
    Create a full video clip from `start_frame` to `start_frame + num_frames - 1`.
    Save to the new directory structure under NEW_BASE_PATH.
    """
    clip_filename = f"source_{original_video_uid}_{start_frame}_{start_frame + num_frames - 1}.mp4"
    clip_path = os.path.join(base_output_dir, clip_filename)

    # print(base_output_dir, "\n", clip_filename,  "\n",clip_path)

    # Ensure the directory exists
    os.makedirs(os.path.dirname(clip_path), exist_ok=True)

    # Save subclip from frames using the save_subclip_from_frames function
    ok = save_subclip_from_frames(
        input_video_path=original_video_path,
        output_video_path=clip_path,
        start_frame=start_frame,
        end_frame=start_frame + num_frames - 1,
        overwrite=True
    )
    return clip_path if ok else None

def build_user_prompt(object_sentence: str, global_attrs_dict: dict) -> str:
    """Build a user prompt to identify the object in the sentence and output its attributes."""

    # Add object description to the prompt to focus on identifying it
    known_str = []
    for cat_name, cat_values in global_attrs_dict.items():
        joined_vals = ", ".join(sorted(list(cat_values))) if cat_values else "none"
        known_str.append(f"{cat_name} -> {joined_vals}")
    known_str_joined = "\n".join(known_str)

    required_keys_list = [FIELD_MAP[k] for k in FIELD_MAP.keys()]
    required_keys_str = ", ".join(required_keys_list)

    user_prompt = f"""
Please carefully analyze the following sentence and identify the object involved:

The sentence describing the interaction is: "{object_sentence}"

Known attribute sets (choose from these if relevant; otherwise, add a new value):
{known_str_joined}

Return the object's attributes in valid JSON format, with the following top-level keys:
{required_keys_str}

"""
    return user_prompt

##########################################
# MAIN LOGIC: Reading CSV, searching for best original clip, writing new CSV
##########################################



#########################################
# Helper function: save_subclip_from_frames (example)
#########################################
def save_subclip_from_frames(input_video_path: str,
                             output_video_path: str,
                             start_frame: int,
                             end_frame: int,
                             overwrite: bool = False) -> bool:
    """
    Example. Similar to your existing function.
    Saves frames [start_frame, end_frame] as a new MP4.
    """
    if not os.path.exists(input_video_path):
        print(f"[WARN] Input video not found: {input_video_path}")
        return False

    if os.path.exists(output_video_path) and not overwrite:
        print(f"[INFO] Output already exists. Skipping: {output_video_path}")
        return False

    cap = cv2.VideoCapture(input_video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0

    if start_frame >= total_frames or end_frame < 0 or start_frame > end_frame:
        print(f"[WARN] Invalid frame range: {start_frame} to {end_frame}, total={total_frames}")
        cap.release()
        return False

    start_frame = max(0, start_frame)
    end_frame = min(end_frame, total_frames-1)
    num_frames = (end_frame - start_frame + 1)

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    frames_written = 0
    while frames_written < num_frames:
        ret, frame = cap.read()
        if not ret:
            break
        out_writer.write(frame)
        frames_written += 1

    cap.release()
    out_writer.release()
    return True

def process_target_clip(target_clip: str) -> Optional[Dict[str, str]]:
    """
    Process the target clip path to extract original video UID and frame range.
    Returns a dictionary with 'original_video_uid' and 'frame_range' if successful, else None.
    """
    # target_clip = "D:\OneDrive - Microsoft\dataset\Ego4D\new_dataset\short_terms\137\243e800d-fc75-473d-b944-a95844e13a78\2c2bda8d-69a3-4a90-9ad6-f6715bc99f39_144_176.mp4"
    # get the original_video_uid = 243e800d-fc75-473d-b944-a95844e13a78

    base_filename = os.path.basename(target_clip)
    splits = base_filename.split("_")

    if len(splits) < 3:
        print(f"[WARN] Could not parse target_clip: {base_filename}")
        return None

    # Extract original video UID and frame range
    original_video_uid = target_clip.split("\\")[-2]
    original_CLIP_uid = splits[0]  # The first part is the long-term CLIP UID
    start_frame = int(splits[1])  # Start frame
    end_frame = int(splits[2].replace(".mp4", ""))  # End frame (remove .mp4)

    return {
        "original_video_uid": original_video_uid,
        original_CLIP_uid: original_CLIP_uid,
        "start_frame": start_frame,
        "end_frame": end_frame
    }

def is_overlapping(start_frame1, end_frame1, start_frame2, end_frame2):
    """Check if two frame ranges overlap."""
    return max(start_frame1, start_frame2) <= min(end_frame1, end_frame2)


if __name__ == "__main__":
    # You would parse arguments or define `args`, then call main().

    # -------------------------------------------------------------------------
    # 0) Try to load any existing checkpoint files to resume progress
    # -------------------------------------------------------------------------
    tmp_output_csv_path    = os.path.join(args.output_pth, "final_outputs_v2_tmp.csv")
    tmp_video_cache_path   = os.path.join(args.output_pth, "video_uid_cache_tmp.json")
    tmp_narration_path     = os.path.join(args.output_pth, "GPT_narration_tmp.json")
    tmp_sets_as_dict_path  = os.path.join(args.output_pth, "sets_as_dict_tmp.json")

    # Will store the partial results here, if they exist
    partial_updated_rows = []
    processed_row_keys = set()

    # If a tmp CSV exists, read it in:
    if os.path.exists(tmp_output_csv_path):
        print(f"[INFO] Found checkpoint CSV: {tmp_output_csv_path}. Resuming from checkpoint.")
        with open(tmp_output_csv_path, "r", encoding="utf-8") as f_in:
            reader = csv.DictReader(f_in)
            for row in reader:
                partial_updated_rows.append(row)
                # If row is considered processed, e.g. original_video_CLIP is non-empty
                if row.get("original_video_CLIP", "") != "":
                    row_key = (
                        row["person_id"],
                        row["query"],
                        row["target_clip"],
                    )
                    processed_row_keys.add(row_key)

    # If a tmp video cache exists, load it:
    video_uid_cache = {}
    if os.path.exists(tmp_video_cache_path):
        print(f"[INFO] Found checkpoint video_uid_cache: {tmp_video_cache_path}. Resuming from checkpoint.")
        with open(tmp_video_cache_path, "r", encoding="utf-8") as f_in:
            video_uid_cache = json.load(f_in)

    # If a tmp narration file exists, load it:
    if os.path.exists(tmp_narration_path):
        print(f"[INFO] Found checkpoint narration data: {tmp_narration_path}. Resuming from checkpoint.")
        with open(tmp_narration_path, "r", encoding="utf-8") as f_in:
            narration_data = json.load(f_in)
    else:
        # Otherwise load from original
        print("[INFO] Loading narration data fresh...")
        narration_json_path = os.path.join(args.dataset_path, 'narration.json')
        with open(narration_json_path, "r", encoding="utf-8") as f_na:
            narration_data = json.load(f_na)

    # -------------------------------------------------------------------------
    # 1) **Load global attributes** from sets_as_dict_tmp.json if it exists,
    #    else from the original args.attributes_json
    # -------------------------------------------------------------------------
    if os.path.exists(tmp_sets_as_dict_path):
        print(f"[INFO] Found {tmp_sets_as_dict_path}, loading global attributes from checkpoint...")
        with open(tmp_sets_as_dict_path, "r", encoding="utf-8") as f_ga:
            sets_tmp = json.load(f_ga)
    else:
        print(f"[INFO] No {tmp_sets_as_dict_path} found. Loading from {args.attributes_json} instead...")
        with open(args.attributes_json, "r", encoding="utf-8") as f_ga:
            sets_tmp = json.load(f_ga)

    # Convert sets_tmp (which is dict[str, list[str]]) back to dict[str, set[str]]
    global_attrs = {}
    for cat_field, cat_values in sets_tmp.items():
        global_attrs[cat_field] = set(cat_values)

    # -------------------------------------------------------------------------
    # 2) Read the input CSV
    # -------------------------------------------------------------------------
    input_csv_path = args.input_csv
    print(f"[INFO] Reading input CSV: {input_csv_path}")
    all_rows = []
    with open(input_csv_path, "r", encoding="utf-8", newline='') as f_in:
        reader = csv.DictReader(f_in)
        for row in reader:
            all_rows.append(row)

    # Add two columns
    fieldnames = list(all_rows[0].keys()) + ["original_video_CLIP", "original_object_attribute"]

    # Start with whatever we had from the checkpoint
    updated_rows = []
    updated_rows.extend(partial_updated_rows)

    # Build a quick dict for partial rows
    partial_row_lookup = {}
    for r in partial_updated_rows:
        row_key = (r["person_id"], r["query"], r["target_clip"])
        partial_row_lookup[row_key] = r

    # -------------------------------------------------------------------------
    # Main loop
    # -------------------------------------------------------------------------
    total_rows = len(all_rows)
    start_time = time.time()

    # In the main logic, where processing each row:
    for idx, row in enumerate(all_rows):

        row_key = (row["person_id"], row["query"], row["target_clip"])

        if row_key in processed_row_keys:
            print(f"[INFO] Row {idx} is already processed. Skipping.")
            continue

        print(f"\n[INFO] Processing row {idx+1}/{total_rows}: {row_key}")

        row_start = time.time()
        cnt_rows = 0
        person_id = row["person_id"]
        query = row["query"]
        obj = row["object"]  # e.g., "bottle"
        target_clip = row["target_clip"]
        target_object_attr = row["object_attribute"]  # JSON string describing the target object's attributes

        # (A) Identify the "original video" from the target_clip
        base_filename = os.path.basename(target_clip)
        splits = base_filename.split("_")

        if len(splits) < 3:
            print(f"[WARN] Could not parse target_clip: {base_filename}")
            row["original_video_CLIP"] = ""
            row["original_object_attribute"] = ""
            updated_rows.append(row)
            continue

        # (A) Identify the "original video" from the target_clip
        clip_info = process_target_clip(target_clip)
        if not clip_info:
            row["original_video_CLIP"] = ""
            row["original_object_attribute"] = ""
            updated_rows.append(row)
            continue

        original_video_uid = clip_info["original_video_uid"]
        start_frame = clip_info["start_frame"]
        end_frame = clip_info["end_frame"]

        # Check if narration data exists
        if original_video_uid not in narration_data:
            print(f"[WARN] No narration data for video_uid={original_video_uid}")
            row["original_video_CLIP"] = ""
            row["original_object_attribute"] = ""
            updated_rows.append(row)
            continue

        narrow_narrs = []
        video_narration_obj = narration_data[original_video_uid]
        if "narration_pass_1" in video_narration_obj:
            narrow_narrs.extend(video_narration_obj["narration_pass_1"].get("narrations", []))
        if "narration_pass_2" in video_narration_obj:
            narrow_narrs.extend(video_narration_obj["narration_pass_2"].get("narrations", []))

        # Path to original video .mp4
        original_video_path = os.path.join(r".\full_scale", f"{original_video_uid}.mp4")
        if not os.path.exists(original_video_path):
            row["original_video_CLIP"] = ""
            row["original_object_attribute"] = ""
            updated_rows.append(row)
            continue

        # (B) For each narration item, create a full-length subclip and process attributes
        best_score = -1
        best_ts_frame = -1
        best_subclip_path = ""
        best_attr_dict = {}

        parent_dir = os.path.dirname(target_clip)  # e.g. ...\39\72295d26-19f7-4e52-9fa9-33121c9431a2
        os.makedirs(parent_dir, exist_ok=True)  # ensure directory exists

        # Cache processing
        print(f"Processing {original_video_uid}...")
        if original_video_uid not in video_uid_cache:
            print(f"Precomputing attributes for {original_video_uid}...")

            # Detect object attributes for all narrow captions for this video_uid and cache
            video_narrations = narrow_narrs  # Use all narrow narrations

            # Precompute the attributes for this video_uid
            video_attrs = {}

            total_nar = len(video_narrations)
            gpt_start_time = time.time()
            for nar_idx, nar in enumerate(video_narrations):
                ts_frame = nar.get("timestamp_frame", None)
                nar_text = nar.get("narration_text", "")
                print(nar_text)
                if ts_frame is None:
                    continue

                # Now we get the mid-frame of that full clip
                frame_np = get_mid_frame_as_np_array(original_video_path, ts_frame, num_frames=30)
                if frame_np is None:
                    continue

                # print(nar_text)
                # Call GPT to detect attributes
                cand_attr = detect_object_attributes(
                    api_client=cloudgpt_api,  # or your GPT client
                    object_sentence=nar_text,  # from CSV "object"
                    image_data_url=frame_np,  # the image data
                    engine=args.openai_engine,  # example engine
                    global_attrs=global_attrs  # the attribute sets from target image (updated)
                )
                if not cand_attr:
                    continue

                video_attrs[ts_frame] = cand_attr

                # time prediction
                if nar_idx % 2 == 0:
                    nar_rows_processed = nar_idx + 1
                    nar_elapsed = time.time() - gpt_start_time
                    nar_avg_time = nar_elapsed / (nar_idx + 1)
                    nar_remaining = total_nar - (nar_idx + 1)
                    nar_predicted_remaining = nar_avg_time * nar_remaining
                    # in hours
                    print(f"[INFO] Processed {nar_rows_processed} / {total_nar} rows. "
                            f"Estimated remaining time: ~{nar_predicted_remaining / 3600:.2f} hours.")

            # Cache the precomputed attributes for this video_uid
            video_uid_cache[original_video_uid] = video_attrs

            # Update the narration data
            print(f"Updating narration data for {original_video_uid}...")
            for nar in narrow_narrs:
                ts_frame = nar.get("timestamp_frame", None)
                if ts_frame and ts_frame in video_attrs:
                    nar['object_attributes'] = video_attrs[ts_frame]

            # Save template updated narration data to GPT_narration_tmp.json
            output_narration_path = os.path.join(args.output_pth, "GPT_narration_tmp.json")
            with open(output_narration_path, "w", encoding="utf-8") as f_out:
                json.dump(narration_data, f_out, indent=2, ensure_ascii=False)
            print(f"[INFO] Wrote updated narration data to:\n {output_narration_path}")

        # (C) Now, use the cached attributes to compare with the target_object_attr
        print(f"Comparing attributes for {original_video_uid}...")
        video_attrs = video_uid_cache[original_video_uid]
        for nar in narrow_narrs:
            ts_frame = nar.get("timestamp_frame", None)
            nar_text = nar.get("narration_text", "")

            if ts_frame is None or ts_frame not in video_attrs:
                continue

            # Get the precomputed attributes
            cand_attr = video_attrs[ts_frame]

            # # measure similarity to target_object_attr
            # score = compute_attribute_similarity(target_object_attr, cand_attr)

            # Measure similarity to target_object_attr, only if object matches
            candidate_object = cand_attr.get("subcategory", "")
            score = compute_attribute_similarity(target_object_attr, cand_attr, obj, candidate_object)
            print(f"Target: {obj} \n Score for candidate {candidate_object}: {score}")

            if score > best_score:
                best_score = score
                best_ts_frame = ts_frame
                # best_subclip_path = create_full_clip(original_video_path, parent_dir, ts_frame,
                #                                      num_frames=30, original_video_uid=original_video_uid)
                best_attr_dict = cand_attr

        # (D) After scanning all narrations, best_subclip_path is our "original_video_CLIP"
        # best_attr_dict is the "original_object_attribute" (we can store as JSON)
        if best_score <= 0:
            row["original_video_CLIP"] = ""
            row["original_object_attribute"] = ""
        else:
            row["original_object_attribute"] = json.dumps(best_attr_dict)
            full_clip_path = create_full_clip(original_video_path, parent_dir, best_ts_frame,
                                              num_frames=30,
                                              original_video_uid=original_video_uid)  # Save 30 frames or adjust as needed
            row["original_video_CLIP"] = full_clip_path
            if not full_clip_path:
                continue
            print(f"Best clip for {original_video_uid}: {full_clip_path}")
        updated_rows.append(row)
        cnt_rows += 1

        if cnt_rows % 1 == 0:
            # 5) Write a template new CSV with the two extra columns
            tmp_output_csv_path = os.path.join(args.output_pth, "final_outputs_v2_tmp.csv")
            with open(tmp_output_csv_path, "w", newline='', encoding="utf-8") as f_out:
                writer = csv.DictWriter(f_out, fieldnames=fieldnames)
                writer.writeheader()
                for r in updated_rows:
                    writer.writerow(r)
            print(f"[INFO] Wrote updated CSV with {cnt_rows} rows:\n {tmp_output_csv_path}")
            elapsed = time.time() - start_time
            rows_processed = idx + 1
            avg_time_per_row = elapsed / rows_processed
            remaining_rows = total_rows - rows_processed
            predicted_remaining_sec = remaining_rows * avg_time_per_row
            predicted_remaining_hrs = predicted_remaining_sec / 3600.0
            print(f"[INFO] Processed {rows_processed} / {total_rows} rows. "
                  f"Estimated remaining time: ~{predicted_remaining_hrs:.2f} hours.")

    # 5) Write a new CSV with the two extra columns
    output_csv_path = os.path.join(args.output_pth, "egomemory.csv")
    with open(output_csv_path, "w", newline='', encoding="utf-8") as f_out:
        writer = csv.DictWriter(f_out, fieldnames=fieldnames)
        writer.writeheader()
        for r in updated_rows:
            writer.writerow(r)

    # Save updated narration data to GPT_narration.json
    output_narration_path = os.path.join(args.output_pth, "GPT_narration.json")
    with open(output_narration_path, "w", encoding="utf-8") as f_out:
        json.dump(narration_data, f_out, indent=2, ensure_ascii=False)
